## Author of the R file: Shuqing Xu
## R file last updated: 8.8.2017
## Edited by: EM on 23.8.2017

## QTL Mapping
## Output: 

## 2017 RIL Example from a CSV
#################################################################################
################### Set-Up Workspace and Resource Files #########################
#################################################################################
setwd("C:/Users/mwang/Desktop/ggplot2/QTL MAPPING FILES/amfQTL3/385")
rm(list = ls())
## Clear the workspace of all files
#source("http://bioconductor.org/biocLite.R")
#biocLite("QTLRel")
#biocLite("plyr")
#biocLite("dplyr")
library(QTLRel)
library(plyr)
library(dplyr)

## Install and load the appropriate packages 

gdat <- read.table("AZxUT.NIATTr2.GTdata", 
                   sep = ",", 
                   header = T, 
                   check.names = F,
                   row.names = 1)
## Load AZxUT.NIATTr2.GTdata

gmap <- read.table("AZxUT.NIATTr2.linkagemap", 
                   header = T, 
                   check.names = F,
                   nrows = 100000)
## Load AZxUT.NIATTr2.linkagemap

ped <- read.table("F11PedgreeV2.txt", 
                  header = T, 
                  check.names = F)
## Load F11PedgreeV2.txt

file <- read.csv('compund2_log.csv', header = T)#here is loading traits data
head(file)
dim(file)
## Load the csv, try read.csv2 if the file does not look good (check the file!)
file2 <- read.csv2('Genotype_to_SampleID.csv', header = T)
file3 <- file2[ , 1:2]
colnames(file3) <- c("SampleIDmass", "Genotype")
merge <- merge(file, file3, by=c("SampleIDmass"))
write.csv(merge, file="merge.csv")
head(merge)

amf1 <- merge[ , c(2, 3)] %>% 
  group_by(Genotype) %>% 
  summarise(mean = mean(Normalization))
write.csv(amf1, file="amf1.csv")
head(amf1)
amf2 <- na.omit(amf1)
head(amf2)
dim(amf2)

write.table(amf2, file = "genotypeXtraits.txt", sep = "\t")

#mortality.txt
pdat<-read.table("genotypeXtraits.txt", sep="\t", header=T, check.names=F,row.names=1);


pdat <- read.table("genotypeXtraits.txt", 
                   sep = "\t", 
                   header = T, 
                   check.names = F,
                   row.names = 1)
head(pdat)
dim(pdat)

row.names(pdat) <- pdat$Genotype
length(row.names(pdat))

#################################################################################
############################## Impute the Genotypes #############################
#################################################################################
## Estimate the relationship among individuals and impute the genotypes
## This step can take really long...

colnames(ped)[1] <- "id"
id <- intersect(row.names(pdat), as.character(ped$id))
id
length(id)
idcf <- cic(ped, ids = id, df = 0, ask = FALSE, verbose = TRUE)

## Calculate Jacquard condensed identity coefficients
## Jacquard determined 9 condensed coefficients of identity
## This function determines if there are any within our dataset 
## Output is a cic object with what coefficients exist in the genotypic data
## Calculations based on a pedigree file
## ped = pedigree file
## ids = id list (exclusive list of those ids present in data and pedigree list, made above)
## df = 0; if there are no intermediate generations, df should be 0
## ask = FALSE; if TRUE, users are asked whether to proceed at each step
## verbose = TRUE; if TRUE, status messages are printed out about the function
## Notes: cic() will work even if there are not ids or they aren't matched up, 
## but this is not recommended, especially for a large dataset
gm <- genMatrix(idcf)
## Translates the above result 
## Gives a genetic matrix from the identity coefficients
## Results include:
## AA = additive genetic matrix
## DD = dominance genetic matrix
## AD, HH, MH = other genetic matrices
## ib = inbreeding coefficient
## Use this result in the QTL mapping code
sum(is.na(gm))
## Check that there are no NAs in the gm result

gdatImputed <- genoImpute(gdat, gmap, gr = 6, na.str = "-")
## Impute the data
## Only suitable for advanced intercross lines (AI)
## Unknown variable gr = generation that we are looking at; na.str = replace NAs with: -

save(ped, gdat, pdat, gmap, id, gm, gdatImputed, file = "QTLRIL.saved.RData")
## Save the imputed data

#################################################################################
############################### Clean Up the Data ###############################
#################################################################################
## Only select individuals that have all information available

load("QTLRIL.saved.RData")

id <- intersect(id, row.names(gdat))
length(id)
gdat <- gdat[id, ]
ped <- ped[ped$id%in%id, ]
head(ped)
pdat <- pdat[id, ]
head(pdat)

#################################################################################
################################## QTL Mapping ##################################
#################################################################################
## Load the QTL mapping function

source("Trait2QTL.R") 
head(pdat)
head(gdatImputed)
## For running an example, only use 1 boot strap. 
## For real analysis, use boot = 100 or boot = 500 to calculate confidence invertal.
## This will take quite a while to finish...
## By default, likelihood ratio tests are used for calculating significance.

## Map it, output is the expected graph
mortality.QTL <- Trait2QTL(pdata = pdat,
                           gdata = gdatImputed,
                           gm = gm,
                           filename = "amfcompound2log_boot_500",
                           gmap = gmap,
                           TraitID = "mean", 
                           boot = 500)

write.table(mortality.QTL, file = "amfcompound2log_boot_500.csv", sep=";")

file <- read.csv2("amfcompound2log_boot_500.csv", header = T)
dim(file)
## Load csv file that we just created
file$LOD <- as.numeric(as.character(file$LOD))
file$LOD 
file1 <- arrange(file, desc(LOD))
write.table(file1, file = "amfcompound2log_boot_500_LOD score.csv", sep=";")
## Sort by LOD value
file2 <- file1[1:115, ]#224
## Grab certain number of values from the top of the QTL selection list
file2$SC_ID1 <- lapply(strsplit(as.character(file2$SC_ID), "_"), "[", 1)
file2$SC_ID2 <- lapply(strsplit(as.character(file2$SC_ID), "_"), "[", 2)

require(gdata)
annot <- read.table("NIATTr2.AN6.txt", header = T)
annot$X12335 <- as.numeric(as.character(annot$X12335))
annot$X12823 <- as.numeric(as.character(annot$X12823))
colnames(annot)[1] <- "chromosome"
colnames(annot)[9] <- "genes"

file.create("AMF Gene Candidate Details.csv");
file.create("AMF Gene Candidates.csv");
for (i in 1:nrow(file2)){
  if (i == 1) {
    columns <- c("Chromosome", "Gene", "Start", "Stop", "GeneAnnotationResult", "Type", "GeneID", "QTLMappingTarget", "QTLMappingLODScore")
    write.table(t(as.matrix(columns)), file = "AMF Gene Candidates.csv", col.names = FALSE, quote = FALSE, sep = ";");
  }
  print(i
  );
  s <- as.numeric(file2$SC_ID2[i]);
  p <- file2$SC_ID1[i];
  flex <- 110000;#120000
  subset <- subset(annot, chromosome == p);
  subset1 <- subset(subset, X12335 > (s - flex));
  subset2 <- subset(subset1, X12823 < (s + flex));
  if (nrow(subset2) != 0) {
    subset2$Genenames <- lapply(strsplit(as.character(subset2$genes), ";"), "[", 2);
    subset3 <- subset2[ , c(1, 3, 4, 5, 10)];
    subset3$Name <- lapply(strsplit(as.character(subset3$Genenames), "="), "[", 1);
    subset3$GeneID <- lapply(strsplit(as.character(subset3$Genenames), "="), "[", 2);
    subset4 <- subset(subset3, Name == "Name");
    subset4$QTLMappingTarget <- file2$SC_ID[i];
    subset4$QTLMappingLODScore <- file2$LOD[i];
    subset3 <- as.matrix(subset3);
    subset4 <- as.matrix(subset4);
    write.table(subset3, file = "AMF Gene Candidate Details.csv", sep = ";", append = TRUE, 
                col.names = FALSE);
    write.table(subset4, file = "AMF Gene Candidates.csv", sep = ";", append = TRUE,
                col.names = FALSE);
  }
}

##-----------------------------------------------------------------------------##
##-----------------------------------------------------------------------------##
##-----------------------------------------------------------------------------##